#!/usr/bin/python
# Filename: replace_rare.py

fr = file('ner_train.dat');
words_count = dict();
for line in fr:
	words = line.strip().split(' ');
	if words_count.__contains__(words[0]):
			words_count[words[0]] += 1;
	else:
			words_count[words[0]] = 1;
fr.seek(0);
fw = file('ner_train_rep.dat','w');
for line in fr:
	word = line.strip('\n').split(' ');
	if words_count[word[0]] > 5:
		fw.write(line);
	else:
		rep_line = '_RARE_'+' '+word[1]+'\n';
		fw.write(rep_line);
fr.close();
fw.close();
		
		
